
* +++++++++++++++++++++
* CLEAN CRIME DATA
* +++++++++++++++++++++

/*
Polizeiliche Kriminalstatistik (crime reports)

source: https://www.bka.de/DE/AktuelleInformationen/StatistikenLagebilder/PolizeilicheKriminalstatistik/PKS2019/PKSTabellen/KreisTV/kreisTV_node.html
*/ 

* globals
global data_raw_crime "${data_raw}/crime/"


* loop over files from various years
forval year = 2014/2019 { 
	if `year' == 2014			local file "tb01_FaelleGrundtabelleKreise_csv"
	if `year' == 2015			local file "tb01_FaelleGrundtabelleKreise_csv"
	if `year' == 2016			local file "BKA-LKS-F-03-T01-Kreise_csv"
	if `year' == 2017			local file "BKA-LKS-F-03-T01-Kreise_csv"
	if `year' == 2018			local file "BKA-LKS-F-03-T01-Kreise_csv"
	if `year' == 2019			local file "KR-F-01-T01-Kreise-Faelle-HZ_csv"	
	import delimited "${data_raw_crime}/`year'//`file'.csv", clear

	* focus on selected types of crimes
	keep if v2 == "Straftaten insgesamt" | v2 == "Gewaltkriminalität" /// 
		| v2 == "Diebstahl insgesamt und zwar:"
	
	* 2014 file looks slighlty different so adjust file to make loop work 
	if `year' == 2014 {
		keep v2 v3 v4 v5 v11 v12 v13 v14 v15 v16
		ren (v5 v11 v12 v13 v14 v15 v16) (v6 v12 v13 v14 v15 v16 v17)
	}
	else {
		keep v2 v3 v4 v6 v12 v13 v14 v15 v16 v17
	}
	foreach var of varlist v6-v17 {
		replace `var' = regexr(`var', ",", "")
			destring `var', replace
	}
	
	* give meaningful names to variables
	ren (v2 v3 v4 v6 v12 v13 v14 v15 v16 v17) ///
		(type ags place_name cases cases_cleared clearance_rate ///
		suspects_tot suspects_m suspects_f suspects_for)
	destring ags, replace	
	
	* number of german suspects: residual from number of all suspects minus foreign suspects
	gen suspects_de = suspects_tot - suspects_for
	
	* add column for specific type of crime without doing a full reshape
	foreach var in cases suspects_tot suspects_m suspects_f suspects_for suspects_de {
		bys ags : egen `var'_vio_crime_h = mean(`var') if type =="Gewaltkriminalität"
		bys ags : egen `var'_vio_crime = mean(`var'_vio_crime_h)
		drop `var'_vio_crime_h
		bys ags : egen `var'_theft_h = mean(`var') if type =="Diebstahl insgesamt und zwar:"
		bys ags : egen `var'_theft = mean(`var'_theft_h)
		drop `var'_theft_h
	}
	keep if type == "Straftaten insgesamt"
	drop type
	
	* clean up
	gen year = `year'
	order ags year, first
	isid ags
	tempfile crime_`year'
	save `crime_`year''
}

* combine files from various years
use `crime_2014', clear
forval year = 2015/2019 { 
	append using `crime_`year''
}
sort ags year

* divide by population
qui: ds
local varlist `r(varlist)'

merge 1:1 ags year using "${data_derived}/kreis_total_pop_by_age_gender.dta", ///
	nogen assert(1 2 3) keep(3)

* suspects per relevant populaton
foreach g in m f { 
	gen suspects_per_pop_`g'_tot = suspects_`g' / pop_`g'_tot
}

gen suspects_per_pop_for = suspects_for / pop_for_tot
gen suspects_per_pop_de = suspects_de / (pop_tot - pop_for_tot)
gen suspects_per_pop_tot = suspects_tot / pop_tot
gen cases_per_pop = cases / pop_tot
foreach t in vio_cr theft { 
	gen suspects_per_pop_for_`t' = suspects_for_`t' / pop_for_tot
	gen suspects_per_pop_de_`t' = suspects_de_`t' / (pop_tot - pop_for_tot)
	gen suspects_per_pop_tot_`t' = suspects_tot_`t' / pop_tot
	gen cases_per_pop_`t' = cases_`t' / pop_tot
}

keep `varlist' *per_pop* 

* reshape wide on year
ds ags year place_name, not
renvars `r(varlist)', suff(_)
reshape wide cases_-cases_per_pop_theft_, i(ags) j(year)
order ags place_name, first

* shorten variable names
ren suspects_* susp_*

* generate change in crime variables 
foreach var in susp_per_pop_de susp_per_pop_for cases_per_pop {
	gen pct_chg_`var' = (`var'_2019 - `var'_2014)/`var'_2014
	foreach type in vio_cr theft {
		gen pct_chg_`var'_`type' = ///
			(`var'_`type'_2019 - `var'_`type'_2014)/`var'_`type'_2014
	}
}

* create logs of cases
foreach var of varlist cases_* {
	gen log_`var' = log(`var')
}

* save
save "${data_derived}/suspects_by_nat_by_kreis.dta", replace
